# Data preperation: pathways-merged-intermediate.Rdata

# clear all
rm(list=ls(all=TRUE))

# load packages
library(foreign)

# read in your data that is in a spreadsheet form .csv file here
setwd("P:/2017-pathways/new/2-data")
rawdata <- read.csv2("pathways-text-data.csv", header=TRUE, sep=";", stringsAsFactor=FALSE)

# make the open answers text not factor
rawdata$treatment <- 0 
rawdata$treatment[rawdata$r7km1a!=""] <- 1
rawdata$treatment[rawdata$r7km1b!=""] <- 2
rawdata$treatment[rawdata$r7km1c!=""] <- 3
rawdata$treatment[rawdata$treatment==0] <- NA
rawdata$treatment <- as.factor(rawdata$treatment)

# add treatment
data <- rawdata
rm(rawdata)
data$openanswer <- paste(data$r7km1a, data$r7km1b, data$r7km1c)
length(data$openanswer)
data$openanswer.orig <- data$openanswer

# merge with background variables
ncp <- read.spss(file="Norwegian Citizen Panel - wave 1-9 NO.sav",
                 use.value.labels=FALSE,
                 to.data.frame=TRUE,
                 trim.factor.names=TRUE)
myvars<-c("responseid", "r7P1", "r7P2", "r7P3", "r7P4_1", "r7P4_2", "r7P5_1", 
          "r8B2_2", "r7municipalSize", "w01_k24", "w03_r3k24", "r4k24", "r5km11")
numdata<-ncp[myvars]
numdata$serial<-as.integer(numdata$responseid)
data<-merge(data, numdata, 
            by.x="responseid",
            by.y="responseid",
            all.x=TRUE,
            all.y=TRUE
)
rm(ncp, numdata)

# Save intermediary data set, mainly for table on key words (before stop word removal)
save(data, file="pathways-merged-intermediate.Rdata")